# coding:utf-8
# 作者：理想国真恵玩
import pandas as pd
import numpy as np
import matplotlib.pyplot as  plt

path = "../data/movie.csv"
df = pd.read_csv(path)
print(df.columns)

# 进行字符串分割
temp_list = [i.split(",") for i in df["Genre"]]
# 获取电影的分类
genre_list = np.unique([i for j in temp_list for i in j])

# 增加新的列
temp_df = pd.DataFrame(np.zeros([df.shape[0], genre_list.shape[0]]), columns=genre_list)

for i in range(1000):
    # temp_list[i] ['Action','Adventure','Animation']
    temp_df.ix[i, temp_list[i]] = 1

temp_df.sum().sort_values(ascending=False).plot(kind="bar", figsize=(20, 8), fontsize=20, colormap="cool")
plt.show()
